library(tidyverse)
library(naniar)
library(simputation)
library(ggplot2)
library(plotly)
# library(readr)
state_data <- read_csv("state_data_updated.csv")
context_state_data <- read_csv("context_state_data.csv")
The state_data csv contains “geographic and population
break downs,cohort years and sizes, rate statistics and outcome
statistics”
The context_state_data csv contains context variables to
support comparison and contextualization of criminal justice caseload
and socioeconomic outcome statistics.
state_data %>%
head()
## # A tibble: 6 × 77
## fips cohort_year sex race age_group repeat_contact off_type fe_rate
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 2000 0 0 0 0 0 1554
## 2 4 2000 0 0 0 1 0 NA
## 3 4 2000 0 0 0 2 0 NA
## 4 4 2000 0 0 0 3 0 NA
## 5 4 2000 0 0 0 0 1 302
## 6 4 2000 0 0 0 1 1 NA
## # ℹ 69 more variables: N_fe_rate <dbl>, fe_any_w2_y1 <dbl>,
## # N_fe_any_w2_y1 <dbl>, fe_any_w2_y3 <dbl>, N_fe_any_w2_y3 <dbl>,
## # fe_any_w2_y5 <dbl>, N_fe_any_w2_y5 <dbl>, fe_w2_wages_y1 <dbl>,
## # N_fe_w2_wages_y1 <dbl>, fe_w2_wages_y3 <dbl>, N_fe_w2_wages_y3 <dbl>,
## # fe_w2_wages_y5 <dbl>, N_fe_w2_wages_y5 <dbl>, inc_rate <dbl>,
## # N_inc_rate <dbl>, inc_any_w2_y1 <dbl>, N_inc_any_w2_y1 <dbl>,
## # inc_any_w2_y3 <dbl>, N_inc_any_w2_y3 <dbl>, inc_any_w2_y5 <dbl>, …
state_data %>%
summarize(range(cohort_year))
## # A tibble: 2 × 1
## `range(cohort_year)`
## <dbl>
## 1 2000
## 2 2022
context_state_data %>%
head()
## # A tibble: 6 × 7
## fips N_contextdata violent property poverty health unemployment
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 01 4886307 438 3322 0.176 1.79 0.059
## 2 02 733306 655 2871 0.106 -0.33 0.07
## 3 04 6933221 420 3212 0.162 -0.15 0.059
## 4 05 2989074 499 3579 0.177 1.47 0.055
## 5 06 38734434 418 2606 0.145 -0.76 0.072
## 6 08 5501284 321 2656 0.113 -0.57 0.049
We can see that the data collected ranges from the years 2000 to 2022.
# Getting rid of leading 0's in fips code
context_state_data <- context_state_data %>%
mutate(fips = as.integer(fips))
# Create a data frame with FIPS codes and state names
fips_codes <- data.frame(
state = c("Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"),
fips = c(1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56)
)
# Join state data to fips codes
state_data <- state_data %>%
inner_join(fips_codes, by = "fips")
context_state_data <- context_state_data %>%
inner_join(fips_codes, by = "fips")
# state_data %>%
# summary()
# separating variables into different data sets
fe_state_data <- state_data %>%
filter(sex == 0, race == 0, age_group == 0, off_type == 0, repeat_contact == 0) %>% # filter race, sex, age, offense type, and repeat contact == 0 (all)
select(fips, state, cohort_year, fe_rate, N_fe_rate, fe_any_w2_y1, N_fe_any_w2_y1, fe_w2_wages_y1, N_fe_w2_wages_y1) %>%
na.omit()
inc_state_data <- state_data %>%
filter(sex == 0, race == 0, age_group == 0, off_type == 0, repeat_contact == 0) %>%
select(fips, state, cohort_year, inc_rate, N_inc_rate, inc_any_w2_y1, N_inc_any_w2_y1, inc_w2_wages_y1, N_inc_w2_wages_y1) %>%
na.omit()
mi_state_data <- state_data %>%
filter(sex == 0, race == 0, age_group == 0, off_type == 0, repeat_contact == 0) %>%
select(fips, state, cohort_year, mi_rate, N_mi_rate, mi_any_w2_y1, N_mi_any_w2_y1, mi_w2_wages_y1, N_mi_w2_wages_y1) %>%
na.omit()
# Viewing number of rows in each data set (number of observations)
nrow(fe_state_data)
## [1] 180
nrow(inc_state_data)
## [1] 203
nrow(mi_state_data)
## [1] 165
# tidyverse function rename(New_Name = Old_Name)
# felony
fe_data <- fe_state_data %>%
rename(state_pop = N_fe_rate, cohort_size = N_fe_any_w2_y1, year = cohort_year) %>%
select(!N_fe_w2_wages_y1)
# incarcerated
inc_data <- inc_state_data %>%
rename(state_pop = N_inc_rate, cohort_size = N_inc_any_w2_y1, year = cohort_year) %>%
select(!N_inc_w2_wages_y1)
# misdemeanor
mi_data <- mi_state_data %>%
rename(state_pop = N_mi_rate, cohort_size = N_mi_any_w2_y1, year = cohort_year) %>%
select(!N_mi_w2_wages_y1)
Variable descriptions and details
fips/state: state in which… different for
each variable..year: based on charge disposition date, or when an
individual was convictedfe_rate: per capita rate of felony charges per 100,000
residents, based on charge filing datestate_pop: population of the statefe_any_w2_y1: proportion of individuals with at least
one W2 filed one year after a felony charge per cohort per statecohort_size: Group of people with felony charges based
on the date the charge was filedfe_w2_wages_y1: average income of individuals one year
after a felony chargeVariables for other categories or data sets are similar.
# felony
fe_data %>%
ggplot(aes(x = year, y = fe_any_w2_y1)) +
geom_point(size = 1)+
geom_line()+
facet_wrap(~state)+
labs(
title = "Yearly Proportion of Individuals Filing W2 One Year After Felony Charge by State",
x = "Year",
y = "Proportion of Individuals with W2 Filed"
)+
theme_minimal()
# incarcerated
inc_data %>%
ggplot(aes(x = year, y = inc_any_w2_y1)) +
geom_point(size = 1)+
geom_line()+
facet_wrap(~state)+
labs(
title = "Yearly Proportion of Individuals Filing W2 One Year After Release by State",
x = "Year",
y = "Proportion of Individuals with W2 Filed"
)+
theme_minimal()
# misdemeanor
mi_data %>%
ggplot(aes(x = year, y = mi_any_w2_y1)) +
geom_point(size = 1)+
geom_line()+
facet_wrap(~state)+
labs(
title = "Yearly Proportion of Individuals Filing W2 One Year After Misdemeanor Charge by State",
x = "Year",
y = "Proportion of Individuals with W2 Filed"
)+
theme_minimal()
# Combine the data into one data frame
combined_data <- bind_rows(
fe_data %>% mutate(category = "Felony", any_w2_y1 = fe_any_w2_y1),
inc_data %>% mutate(category = "Incarcerated", any_w2_y1 = inc_any_w2_y1),
mi_data %>% mutate(category = "Misdemeanor", any_w2_y1 = mi_any_w2_y1)
) %>%
select(year, state, category, any_w2_y1)
# Create the combined plot
ggplot(combined_data, aes(x = year, y = any_w2_y1, color = category)) +
geom_point(size = 1) +
geom_line() +
facet_wrap(~state) +
labs(
title = "Yearly Proportion of Individuals Filing W2 One Year After Felony, Incarceration, or Misdemeanor by State",
x = "Year",
y = "Proportion of Individuals with W2 Filed",
color = "Category"
) +
theme_minimal()
indeed <- read_csv("indeed-data.csv")
pledge <- read_csv("pledge-data.csv")
state_data <- tibble(
state = state.name,
abb = state.abb
)
indeed <- indeed %>%
left_join(state_data)
indeedCJARS <- fe_data %>%
left_join(indeed) %>%
filter(year == 2017)
indeedCJARS %>%
ggplot(aes(x = avgFair, y = fe_any_w2_y1, label = abb)) +
geom_text() +
labs(title = "Felony Charges", y = "Proportion of Individuals with W2 Filed", x = "Fair Chance Job postings per 1,000 postings") +
# geom_smooth(method = "lm")+
theme_minimal()
# Creating an index
pledge_index <- pledge %>%
group_by(state) %>%
summarize(
yes_count = sum(pledge == "Yes"),
no_count = sum(pledge == "No"),
index = yes_count / (yes_count + no_count)
) %>%
mutate(index = case_when(
is.na(index) ~ 0, # for cases where there are no yes or no counts
TRUE ~ round(index, 2)
))
pledge_index <- pledge_index %>%
left_join(state_data)
indeedPledge <- pledge_index %>%
left_join(indeed)
plot <- indeedPledge %>%
ggplot(aes(x = index)) +
geom_bar()+
labs(x = "Pledge Index", y = "Count", title = "Count of Pledge Index for all 50 States")+
theme_minimal()
ggplotly(plot)
# indeedPledge %>%
# ggplot(aes(x = avgFair, y = index, label = abb)) +
# geom_text() +
# labs(title = "Pledges", y = "State Pledge Index", x = "Fair Chance Job postings per 1,000 postings") +
# theme_minimal()
employer_counts <- pledge %>%
filter(pledge == "Yes") %>%
count(employer, sort = TRUE)
top_employers <- employer_counts %>%
top_n(9, n)
plot2 <- top_employers %>%
ggplot(aes(x = reorder(employer, n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() + # Flips the coordinates for better readability
labs(x = "Employer", y = "Number of 'Yes' Pledges", title = "Top 9 Employers with 'Yes' Pledges") +
theme_minimal()
ggplotly(plot2)
There are only 9 unique employers that have signed the Fair Chance Pledge. 21 of these instances were from Walmart
# Create the linear regression model
model <- lm(fe_any_w2_y1 ~ avgFair, data = indeedCJARS)
summary(model)
##
## Call:
## lm(formula = fe_any_w2_y1 ~ avgFair, data = indeedCJARS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.055748 -0.032179 -0.022206 -0.001103 0.115873
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.562533 0.072864 7.720 9.15e-06 ***
## avgFair -0.004087 0.004547 -0.899 0.388
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05442 on 11 degrees of freedom
## Multiple R-squared: 0.06842, Adjusted R-squared: -0.01627
## F-statistic: 0.8079 on 1 and 11 DF, p-value: 0.388
Interpreting the intercept doesn’t make much sense. It is saying that when there are zero fair chance postings per 1000 Indeed job postings, the proportion of individuals with a filed W2 one year after a felony conviction is approximately 0.5625
Interpreting the coefficient: For each additional fair chance job per 1000 Indeed posts, the proportion of individuals with a filed W2 one year after a felony conviction decreases by 0.004087.
The p-value is greater than 0.05 which suggests that the model is not statistically significant. This means that the indeed job postings do not provide a meaningful explanation of W2s filed.
allMeasures <- indeedCJARS %>%
left_join(pledge_index)
model2 <- lm(fe_any_w2_y1 ~ avgFair + index, data = allMeasures)
summary(model2)
##
## Call:
## lm(formula = fe_any_w2_y1 ~ avgFair + index, data = allMeasures)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.05826 -0.02841 0.00258 0.01428 0.09347
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.550207 0.064989 8.466 7.15e-06 ***
## avgFair -0.001657 0.004218 -0.393 0.7028
## index -0.169248 0.085108 -1.989 0.0748 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04832 on 10 degrees of freedom
## Multiple R-squared: 0.3324, Adjusted R-squared: 0.1989
## F-statistic: 2.49 on 2 and 10 DF, p-value: 0.1326
Coefficients:
For each additional fair chance job per 1000 Indeed posts, the proportion of individuals with a filed W2 one year after a felony conviction decreases by approximately 0.001657.
For each additional top business that has signed the fair chance pledge, the proportion of individuals with a filed W2 one year after a felony conviction decreases by approximately 0.169248
This doesn’t really make sense…
policy_stuff <- read_csv("policy_stuff2.csv")
policy_stuff %>%
ggplot(aes(y = avgFair, x = Average_Index, label= state))+
geom_text() +
geom_smooth(method = "lm", se = FALSE)+
labs(x = "Average Index", y = "Fair Chance Job postings per 1,000 postings", title = "Indeed & Average Index")+
theme_minimal()
policy_stuff %>%
ggplot(aes(y = avgFair, x = Negative_Index, label= state))+
geom_text()+
geom_smooth(method = "lm", se = FALSE)+
labs(x = "Negative Index", y = "Fair Chance Job postings per 1,000 postings", title = "Indeed & Negative Index")+
theme_minimal()
# policy_stuff %>%
# ggplot(aes(x = avgFair, y = Value_1, label= state))+
# geom_text()
#
# policy_stuff %>%
# ggplot(aes(x = avgFair, y = Value_5, label= state, color = Value_4))+
# geom_text()
policyCJARS <- fe_data %>%
left_join(policy_stuff) %>%
filter(year == 2017)
policyCJARS %>%
ggplot(aes(y = fe_any_w2_y1, x = Negative_Index, label= state))+
geom_text()+
geom_smooth(method = "lm", se = FALSE)+
labs(x = "Negative Index", y = "Proportion of Individuals with W2 Filed", title = "Indeed & Negative Index")+
theme_minimal()
policyCJARS %>%
ggplot(aes(y = fe_any_w2_y1, x = Average_Index, label= state))+
geom_text()+
geom_smooth(method = "lm", se = FALSE)+
labs(x = "Average Index", y = "Proportion of Individuals with W2 Filed", title = "Indeed & Average Index")+
theme_minimal()
# policyCJARS %>%
# ggplot(aes(y = fe_any_w2_y1, x = Value_5, label= state, color = Value_4))+
# geom_text() +
# geom_smooth(method = "lm")